/***********************************************************************************************************************************************************************
input datasets: 
public_history_plants : establishment-level data on subsidiaries, branches, and plants of U.S. firms from the National Establishment Time-Series (NETS) ;
ccmfunda8918: CRSP/Compustat Merged fundamentals annual data downloaded from WRDS ;
Enforcement_facility: facility-level information on EPA enforcement actions (from EPA);
Enforcements:information on EPA enforcement actions (from EPA);
Epanets_caseid: EPA enforcement actions case number merged with NETS data using dunsnumber;
ccmlinktable: CRSP and Compustat linking table downloaded from WRDS ;
tnic2 and tnic3: TNIC2 and TNIC3 peers downloaded from Hoberg and Phillips's website;
tri_harmful: plants' total harmful chemical releases (generated using data from EPA's Toxics Release Inventory (TRI) database);
RSEI_Score: plants' RSEI score downloaded from EPA;
Holdings_equityfund: mutual fund stock holding information from Thomson Reuters Mutual Fund Holdings database;
msf: stock monthly return and price information;
Escore_funds_msci_2groups: mutual fund portfolio firms' escore calculated following  Hwang, Titman, and Wang (2022) ;
Contact_info: fund contact information from CRSP mutual fund database;
output datasets: peertrimf_firm0.dta and peertrimf_firm.dta (for Table 10); 
****************************************************************************************************************************************************************************/

libname epadata "X:\data";
%include "X:\sas program\winsor.sas"; 
data plants;
set epadata.Public_history_plants;
Longitude=(-1)*Longitude;
 run;
 proc sort data=plants nodupkey; by gvkey DunsNumber year; run;

Proc sql;
create table enforce_plants as select a.*, b.ACTIVITY_STATUS_DATE, b.CASE_STATUS_DATE, b.TOTAL_PENALTY_ASSESSED_AMT, b.TOTAL_COST_RECOVERY_AMT, 
b.TOTAL_COMP_ACTION_AMT, b.REGION_CODE, b.ENF_OUTCOME_CODE, b.ENF_OUTCOME_DESC
from epadata.Enforcement_facility as a, epadata.Enforcements as b
where a.ACTIVITY_ID=b.ACTIVITY_ID and a.CASE_NUMBER=b.CASE_NUMBER; quit;

 proc sort data=enforce_plants nodupkey; by ACTIVITY_ID CASE_NUMBER FACILITY_UIN ACTIVITY_STATUS_DATE; run;

proc sql;
create table enforce_plants1 as select a.*, b.DunsNumber
from enforce_plants as a, epadata.Epanets_caseid as b
where a.ACTIVITY_ID=b.ACTIVITY_ID and a.CASE_NUMBER=b.CASE_NUMBER and a.FACILITY_UIN=b.REGISTRY_ID; quit;

data enforce_plants1;
set enforce_plants1;
year=year(ACTIVITY_STATUS_DATE);
run;

 proc sort data=enforce_plants1 out=enforce_plants2 nodupkey; by DunsNumber year ACTIVITY_STATUS_DATE; run;

 proc sql;
create table enforce_plants3 as select a.*, b.ACTIVITY_ID, b.TOTAL_PENALTY_ASSESSED_AMT, b.TOTAL_COST_RECOVERY_AMT, 
b.TOTAL_COMP_ACTION_AMT, b.REGION_CODE, b.ENF_OUTCOME_CODE, b.ENF_OUTCOME_DESC, b.ACTIVITY_STATUS_DATE, b.CASE_STATUS_DATE
from plants as a join enforce_plants2 as b
on a.DunsNumber=b.DunsNumber and a.year=b.year; quit;

 proc sort data=enforce_plants3 nodupkey; by gvkey DunsNumber year ACTIVITY_STATUS_DATE; run;

data enforce_plants3;
set enforce_plants3;
   format begdate enddate date9.;
begdate= mdy(1,1,year);
enddate= mdy(12,31,year);
   run;

proc sql;
            create table enforce_plants4
            as select a.*, b.lpermno
            from enforce_plants3 as a, epadata.ccmlinktable as b
            where a.gvkey = b.gvkey and
            b.LINKTYPE in ("LU","LC","LD","LF","LN","LO","LS","LX")
            and      (b.LINKDT<=a.ACTIVITY_STATUS_DATE) and (a.ACTIVITY_STATUS_DATE<=b.LINKENDDT or b.LINKENDDT=.E);
quit;

data enforce_plants4;
set enforce_plants4;
keep gvkey DunsNumber year ACTIVITY_ID TOTAL_PENALTY_ASSESSED_AMT TOTAL_COST_RECOVERY_AMT TOTAL_COMP_ACTION_AMT REGION_CODE
ENF_OUTCOME_CODE ENF_OUTCOME_DESC ACTIVITY_STATUS_DATE CASE_STATUS_DATE plant_zip plant_state FIPsCounty Latitude Longitude plant_NAICS plant_sic2 plant_sic2 
emp PayDexMin EstType LPERMNO ; 
run;

data enforce_plants4;
set enforce_plants4;
gvkey1=gvkey*1;
run;

/* match to peers*/

proc sql;
	create table tnic1a as
	select distinct a.*, b.gvkey2 as gvkey2_tnic3
	from epadata.tnic2 as a left join epadata.tnic3 as b
	on a.gvkey1 = b.gvkey1 and a.gvkey2=b.gvkey2 and a.year=b.year;
quit;

data tnic3;
set tnic1a;
if gvkey1^=gvkey2;
if gvkey2 = gvkey2_tnic3 then tnic3 = 1;
else tnic3 = 0;
drop score;
run;

proc sql;
create table peer1 as select distinct a.*, b.*
from tnic3 as a, enforce_plants4 as b
where a.gvkey1=b.gvkey1 and a.year=b.year;
quit;

data peer2;
set peer1;
keep gvkey2 gvkey1 gvkey2_tnic3 tnic3 gvkey DunsNumber year Latitude Longitude;
run;

data plants;
set plants;
gvkey1=gvkey*1;
run;
/* match to tnic2 peers' plants*/
proc sql;
create table peer3 as select distinct a.*, b.DunsNumber as DunsNumber2, b.Latitude as Latitude2, b.Longitude as Longitude2
from peer2 as a, plants  as b
where a.gvkey2=b.gvkey1 and a.year=b.year; quit;

/* calculate distance*/
data peer3;
set peer3;
distance=geodist(Latitude,Longitude,Latitude2,Longitude2, 'M');
run;

data nets_tri0;
 set epadata.Tri_harmful;
 gvkey2=gvkey*1;
 run;
proc sql;
create table nets_tri1 as select a.*, b.RSEI_Score, b.RSEI_Score_Cancer, b.RSEI_Score_Noncancer
from nets_tri0 as a left join epadata.RSEI_Score as b
on a.TRIFD=b.TRI_Facility_ID and a.year=b.Submission_Year; quit;

data tri_prod;
set epadata.Tri_chemical_plants_prod_ratio;
run;

proc sql;
create table tri_prod1 as select distinct a.gvkey, a.DunsNumber, a.year, mean(production_ratio*harmful) as prod_ratio_harm, mean(production_ratio) as prod_ratio
from tri_prod as a
group by gvkey, DunsNumber, year; quit;

proc sql;
create table nets_tri as select a.*, b.prod_ratio, b.prod_ratio_harm
from nets_tri1 as a left join tri_prod1 as b
on a.gvkey=b.gvkey and a.DunsNumber=b.DunsNumber and a.year=b.year; quit;

proc sql;
create table tri1 as select distinct a.*
from peer3 as a, nets_tri  as b
where a.gvkey2=b.gvkey2 and a.DunsNumber2=b.DunsNumber and a.year=b.year; quit;
/*each year, only keep one obs for a plant with the shortest distance*/

proc sort data=tri1 nodupkey; by gvkey2 DunsNumber2 year DunsNumber; run;

data tri1;
set tri1;
closetemp = 0;
if distance <= 100 then closetemp = 1;
run;

proc sql;
 create table fundcontact as select a.*, b.wficn, b.fund_name
 from epadata.Contact_info as a, epadata.Crsp_fundno_wficn as b
 where a.CRSP_FUNDNO=b.CRSP_FUNDNO; quit;

data fundcontact;
set fundcontact;
zip1=ZIP*1;
run;

proc sql;
create table fundcontact1 as select a.*, b.Y as Latitude1, b.X as Longitude1
from fundcontact as a, epadata.zip_city as b
where a.zip1=b.zip; quit;

proc sql;
create table fundcontact2 as select a.*, b.state_fips
from fundcontact1 as a, epadata.state_fips as b
where a.state=b.state; quit;


proc sql;
create table mfholding as select a.*, b.ZIP, b.Latitude1, b.Longitude1, b.fund_name, b.CRSP_FUNDNO, b.state_fips
from epadata.Holdings_equityfund  as a, fundcontact2 as b
where a.wficn=b.wficn and a.fdate>=b.CHGDT and a.fdate<=b.CHGENDDT; quit;

proc sort data=mfholding nodupkey; by wficn rdate PERMNO ZIP; run;

proc sql;
create table mfholding1 as select a.*, b.SR_funds
from mfholding as a, epadata.Escore_funds_msci_2groups as b
where a.wficn=b.wficn and year(a.rdate)=b.year; quit;

/*each fund-firm, keep the last obs for each year */
data mfholding1;
set mfholding1;
year=year(rdate);
run;

proc sort data=mfholding1 ; by wficn PERMNO year rdate; run;

proc sql;
            create table mfholding2
            as select a.*, b.gvkey
            from mfholding1 as a, epadata.ccmlinktable as b
            where a.permno = b.lpermno and
            b.LINKTYPE in ("LU","LC","LD","LF","LN","LO","LS","LX")
            and      (b.LINKDT<=a.rdate) and (a.rdate<=b.LINKENDDT or b.LINKENDDT=.E);
quit;

data mfholding2;
set mfholding2;
gvkey1=gvkey*1;
run;

data mfholding3;
set mfholding2;
if SR_funds=1;
run;

proc sort data=mfholding3 ; by wficn PERMNO year rdate; run;
data mfholding3b;
set mfholding3;
by wficn PERMNO year rdate ;
if last.year;
run;

proc sort data=mfholding3b; by wficn PERMNO year; run;
data mfholding3b;
set mfholding3b;
by wficn PERMNO year; 
lag_shares_adj=lag(shares_adj);
if first.PERMNO then lag_shares_adj=.;
run;

data shares1;
set epadata.msf;
if PRC^=.; 
PRC=abs(PRC);
if CFACPR^=0 then adjprc=PRC/CFACPR;
if CFACPR=0 or CFACPR=. then adjprc=PRC;
if CFACSHR^=0 then adjSHROUT=SHROUT*CFACSHR;
if CFACPR=0 or CFACPR=. then adjSHROUT=SHROUT;
year=year(date);
keep permno date year adjshrout shrout;
proc sort nodupkey; by permno year date;
run;

proc sql;
	create table mfholding4 as 
	select distinct a.*, b.adjshrout, a.shares_adj/(b.adjshrout*1000) as pctshares
	from mfholding3b as a, shares1 as b
	where a.permno = b.permno and a.year=year(b.date) and month(a.rdate) = month(b.date);
quit;

data mfholding4;
	set mfholding4;
	if pctshares < 0 then pctshares = 0;
	if pctshares > 1 then pctshares = 1;
	if pctshares = . then pctshares = 0;
run;

proc sql;
create table trimf1 as select a.*, b.wficn as wficn2, b.rdate as rdate2, b.Latitude1 as Latitude2mf, b.Longitude1 as Longitude2mf, b.shares_orginal, b.shares_adj, b.pctshares, b.lag_shares_adj
from tri1 as a left join mfholding4 as b
on a.gvkey2=b.gvkey1 and a.year=b.year; quit;


data trimf1;
set trimf1;
distance2mf=geodist(Latitude2mf,Longitude2mf,Latitude2,Longitude2, 'M');
distance1mf=geodist(Latitude2mf,Longitude2mf,Latitude,Longitude, 'M');
run;

data trimf1;
set trimf1;
if pctshares=. then pctshares=0;
closemf0 = 0;
if distance2mf <=100 and distance2mf^=.  and distance1mf<=100 and distance1mf^=. and pctshares>0.005 and  lag_shares_adj^=. then closemf0 = 1;
closemf1 = 0;
if distance1mf<=200 and distance1mf>100  and distance1mf^=. and distance2mf<=200 and distance2mf>100 and pctshares>0.005 and  lag_shares_adj^=. then closemf1 = 1; /*close to enforced firm only*/
closemf2 = 0;
if distance1mf<=200 and distance1mf>150  and distance1mf^=. and distance2mf<=200 and distance2mf>150 and pctshares>0.005 and  lag_shares_adj^=. then closemf2 = 1; /*close to enforced firm only*/
closemf3 = 0;
if  distance1mf>200  and distance1mf^=. and distance2mf>200  and distance2mf^=. and pctshares>0.005 and  lag_shares_adj^=. then closemf3 = 1; 
run;

proc sort data=trimf1 nodupkey; by  gvkey2 DunsNumber2  gvkey1 DunsNumber wficn2 year; run;

proc sql;
create table trimf3 as select distinct a.gvkey2, a.DunsNumber2, a.year, a.tnic3, a.closetemp, sum(closemf0) as closemfcount, 
sum(closemf1) as closemfcount1, sum(closemf2) as closemfcount2, sum(closemf3) as closemfcount3
from trimf1 as a
group by gvkey2, DunsNumber2,  year, tnic3, closetemp; quit;

data trimf3;
set trimf3;
closemf=0;
if closemfcount>0 and closemfcount^=. then closemf=1;
closemf1=0;
if closemfcount1>0 and closemfcount1^=. then closemf1=1;
closemf2=0;
if closemfcount2>0 and closemfcount2^=. then closemf2=1;
closemf3=0;
if closemfcount3>0 and closemfcount3^=. then closemf3=1;
run;
data trimf3;
set trimf3;
if closemf=1 then closemf1=0;
if closemf=1 then closemf2=0;
if closemf=1 then closemf3=0;
if closemf1=1 then closemf2=0;
if closemf2=1 then closemf3=0;
if closemf1=1 then closemf3=0;
run;

proc sort data=trimf3; by gvkey2 DunsNumber2 year tnic3 closetemp; run;

data tri2;
set trimf3;
by gvkey2 DunsNumber2 year tnic3 closetemp; 
if last.tnic3;
run;

proc sort data=tri2; by gvkey2 DunsNumber2 year descending tnic3; run;
proc sort data=tri2 out=tri3 nodupkey; by gvkey2 DunsNumber2 year; run;

data tri3;
set tri3;
rename tnic3=treatment;
close = 0;
if closetemp =1  then close = 1;
run;

/*get all firm-year from NETS*/
proc sort data=nets_tri out=firms nodupkey; by gvkey DunsNumber year; run;
proc sort data=tri3 out=firms1 nodupkey; by gvkey2 DunsNumber2; run;
proc sql;
create table firms2 as select a.*
from firms as a, firms1 as b
where a.gvkey2=b.gvkey2 and a.DunsNumber=b.DunsNumber2; quit;

proc sql;
create table tri4 as select a.gvkey, a.year, a.onsite_release, a.offsite_release, a.total_releases, a.RSEI_Score, a.RSEI_Score_Cancer, a.RSEI_Score_Noncancer, 
a.prod_ratio, a.prod_ratio_harm, a.DunsNumber, b.year as event_year, b.*
from firms2 as a left join tri3 as b
on a.gvkey2=b.gvkey2 and a.DunsNumber=b.DunsNumber2 and a.year>b.year and a.year<=b.year+3; quit;

data tri4; 
set tri4;
drop gvkey1;
run;

data tri4; 
set tri4;
post=0;
if event_year^=. then post=1;
if treatment=. then treatment=0;
if close=. then close=0;
if closemf=. then closemf=0;
if closemf1=. then closemf1=0;
if closemf2=. then closemf2=0;
if closemf3=. then closemf3=0;
run;

data tri5;
set tri4;
treat_close_post=treatment*close*post;
treat_close_post_closemf=treatment*close*post*closemf;
treat_close_post_closemf1=treatment*close*post*closemf1;
treat_close_post_closemf2=treatment*close*post*closemf2;
treat_close_post_closemf3=treatment*close*post*closemf3;
run;

proc sort data=tri5; by gvkey DunsNumber year descending treat_close_post; run;
proc sort data=tri5 nodupkey; by gvkey DunsNumber year ; run;

/*match firm controls*/
proc sql;
 create table ccmfunda as select a.*, b.invt
 from epadata.ccmfunda8918 as a, epadata.invt8918 as b
 where a.gvkey=b.gvkey and a.lpermno=b.lpermno and a.fyear=b.fyear and a.datadate=b.datadate; quit;
proc sort data=ccmfunda nodupkey; by gvkey fyear; run;
data ccmfunda1; set ccmfunda;
by gvkey fyear;
AT_1=lag(AT);
if first.gvkey then AT_1=.;
sale_1=lag(sale);
if first.gvkey then sale_1=.;
run;

data ccmfunda2;
set ccmfunda1;
gvkey1=gvkey*1;
BLEV=(DLTT+DLC)/AT;
MLEV=(DLTT+DLC)/(DLTT+DLC+CSHO*abs(PRCC_F));
LEV=(DLTT+DLC)/(AT-CEQ+abs(PRCC_F)*CSHO);
sale_growth=(sale-sale_1)/sale_1;
SIZE=log(abs(PRCC_F)*CSHO);
BM=CEQ/(abs(PRCC_F)*CSHO);
ROA=OIBDP/AT;
lnSale=log(sale);
MB=(abs(PRCC_F)*CSHO)/CEQ;
ROA=OIBDP/AT;
RDS=XRD/Sale;
ADS=XAD/Sale;
TobinQ=(abs(PRCC_F)*CSHO+DLTT+DLC)/AT;
ATN=PPENT/AT;
Cash=CHE/AT_1;
CF=(IB+DP)/AT_1;
Inventory=invt/sale_1;
run;

proc sql;
create table tri6 as select a.gvkey, a.year, a.close, a.treatment, a.treat_close_post, a.post, a.DunsNumber, a.onsite_release, a.offsite_release, a.total_releases, a.event_year, 
a.RSEI_Score, a.RSEI_Score_Cancer, a.RSEI_Score_Noncancer, a.prod_ratio, a.prod_ratio_harm, 
a.treat_close_post_closemf, a.closemf, a.treat_close_post_closemf1, a.treat_close_post_closemf2, a.treat_close_post_closemf3,
b.gvkey1, b.lpermno, b.sic, b.sale_growth, b.size, b.bm, b.roa, b.mlev, b.blev, b.LEV, b.TobinQ, b.atn, b.lnsale, b.COGS, b.Inventory
from tri5 as a left join ccmfunda2 as b
on a.gvkey=b.gvkey and a.year=b.fyear; quit;

proc sort data=enforce_plants4 out=enforce_firm nodupkey; by gvkey DunsNumber year; run;

proc sql;
create table tri7 as select a.*, b.year as enforce_year
from tri6 as a left join enforce_firm as b
on a.gvkey=b.gvkey and a.DunsNumber=b.DunsNumber and a.year>b.year and a.year<=b.year+3; quit;

data tri7;
set tri7;
self_enforce=0;
if enforce_year^=. then self_enforce=1; run;
proc sort data=tri7; by gvkey DunsNumber year descending self_enforce; run;
proc sort data=tri7 nodupkey; by gvkey DunsNumber year; run;

data tri8;
set tri7;
sic3=int(sic/10);
sic2=int(sic/100);
if  SIC<6000 or SIC>6999;
if year<=2015;
run;

proc sql;
create table tri91a as select a.*, sum(onsite_release) as sum_onsite, sum(treat_close_post) as sum_treat_close_post, sum(self_enforce) as sum_self_enforce,
sum(treat_close_post_closemf) as sum_treat_close_post_closemf, sum(RSEI_Score) as sum_rsei, avg(RSEI_Score) as avg_rsei
from tri8 as a
group by gvkey, year; quit;

data tri91a;
set tri91a;
drop onsite_release treat_close_post self_enforce treat_close_post_closemf; run;
data tri91a;
set tri91a;
rename sum_onsite=onsite_release;
rename sum_self_enforce=self_enforce;
treat_close_post=0;
if sum_treat_close_post>0 then treat_close_post=1;
treat_close_post_closemf=0;
if sum_treat_close_post_closemf>0 then treat_close_post_closemf=1;
if self_enforce=. then self_enforce=0;
run;

proc sort data=tri91a out=tri91b nodupkey; by gvkey year; run;


data event;
set tri91b;
if event_year^=. and treat_close_post=1; run;

proc sort data=event nodupkey; by gvkey  event_year ; run;
data event;
set event;
by gvkey  event_year ;
firstevent=0;
if first.gvkey then firstevent=1;
run;


proc sql;
create table tri91c as select a.*, b.event_year as event_year610, b.firstevent
from tri91b as a left join event as b
on a.gvkey=b.gvkey; quit;

data tri91d;
set tri91c;
pre_treat=0;
if treat_close_post=0 and event_year610^=. and year-event_year610>3 then pre_treat=1;
run;
proc sort data=tri91d; by gvkey  year treat_close_post descending pre_treat; run;
proc sort data=tri91d nodupkey; by gvkey  year treat_close_post; run;
data tri91d;
set tri91d;
ln_onsite=log(1+onsite_release);
ln_offsite=log(1+offsite_release);
ln_totaltri=log(1+total_releases);
run;
%winsor(dsetin=tri91d, dsetout=peertri, byvar=none, vars= avg_rsei ln_onsite  ln_offsite ln_totaltri size bm roa lev   lnsale, pctl=1  99);	

data peertri;
set peertri;
onsite1=onsite_release;
offsite1=offsite_release;
totaltri1=total_releases;
run;

proc sql;
	create table p1a as
	select *, mean(onsite1) as avgonsite1, std(onsite1) as sdonsite1, 
	mean(offsite1) as avgoffsite1, std(offsite1) as sdoffsite1, 
	mean(totaltri1) as avgtotaltri1, std(totaltri1) as sdtotaltri1
	from peertri
	group by sic2, year;
quit;

data p1b;
	set p1a;
	sdonsite = (onsite1-avgonsite1)/sdonsite1;
	sdoffsite = (offsite1-avgoffsite1)/sdoffsite1;
	sdtotaltri = (totaltri1-avgtotaltri1)/sdtotaltri1;
run;

data p1d;
set p1b;
if self_enforce=1 then delete;
 run;

 
 data treat;
 set p1d;
 if treat_close_post=1;
 run;
proc sort data=treat out=cohort nodupkey; by event_year; run;
data cohort;
set cohort;
if event_year>=2003 and event_year<=2012;
keep event_year;
run;


 data cohort1;
 set cohort;
 by event_year; 
Cohort=_N_;
 run;

proc sort data=treat out=treat1 nodupkey; by gvkey  event_year; run;

proc sql;
create table treat2 as select a.*, b.Cohort
from treat1 as a, cohort1 as b
where a.event_year=b.event_year; quit;

proc sql;
create table treat3 as select a.*, b.Cohort, b.event_year as event_year1
from p1d as a, treat2 as b
where a.gvkey=b.gvkey and a.year>=b.event_year-3 and a.year<=b.event_year+3; quit;

proc sort data=treat3 nodupkey; by gvkey  year cohort; run;

data treat3;
set treat3;
if year<event_year1 then post1=0;
if year>event_year1 then post1=1;
run;


data control;
set p1d;
if treat_close_post=0;
if pre_treat=0;
run;

proc sql;
create table control1 as select a.*, b.event_year as event_year1
from control as a left join treat2 as b
on a.gvkey=b.gvkey  and a.year>=b.event_year-6 and a.year<=b.event_year+6; quit; 

data control2;
set control1;
if event_year1^=. then delete;
run;

data control2;
set control2;
drop event_year1;
run;

proc sql;
create table control3 as select a.*, b.cohort, b.event_year as event_year1
from control2 as a, cohort1 as b
where a.year>=b.event_year-3 and a.year<=b.event_year+3; quit;

proc sort data=control3 nodupkey; by gvkey  year cohort; run;
data control3;
set control3;
if year<event_year1 then post1=0;
if year>event_year1 then post1=1;
treat1=0;
run;
data treat3;
set treat3;
treat1=1;
run;

data p1e;
set treat3 control3;
if post1^=.;
run;

data p1e;
set p1e;
treat_post1=treat1*post1;
lnrsei_score=log(avg_rsei+1); run;
 PROC EXPORT DATA=p1e
            FILE="X:\Project_EPA\data\peertrimf_firm0.dta"
            DBMS=STATA REPLACE;
RUN;



proc sort data=nets_tri out=firms nodupkey; by gvkey DunsNumber year; run;
proc sort data=tri3 out=firms1 nodupkey; by gvkey2 DunsNumber2; run;
proc sql;
create table firms2 as select a.*
from firms as a, firms1 as b
where a.gvkey2=b.gvkey2 and a.DunsNumber=b.DunsNumber2; quit;

data temp0;
set tri3;
if treatment=1;
if close=1;
run;

proc sql;
create table tri4a as select a.gvkey, a.gvkey2, a.year, a.onsite_release, a.offsite_release, a.total_releases, a.RSEI_Score, a.RSEI_Score_Cancer, a.RSEI_Score_Noncancer, 
a.prod_ratio, a.prod_ratio_harm, a.DunsNumber, b.year as event_year13 
from firms2 as a left join temp0 as b
on a.gvkey2=b.gvkey2 and a.DunsNumber=b.DunsNumber2 and a.year>b.year and a.year<=b.year+3 ; quit;

proc sql;
create table tri4b as select a.*, b.year as event_year, b.*
from tri4a as a left join tri3 as b
on a.gvkey2=b.gvkey2 and a.DunsNumber=b.DunsNumber2 and a.year> b.year+5 and a.year<=b.year+10; quit;

proc sort data=tri4b; by gvkey DunsNumber year event_year descending event_year13; run;
proc sort data=tri4b nodupkey; by gvkey DunsNumber year event_year; run;

data tri4; 
set tri4b;
post=0;
if event_year^=. then post=1;
post13 = 0;
if event_year13 ~= . and  year - event_year13 <= 3 and  year - event_year13 >= 0 then post13 = 1;
if treatment=. then treatment=0;
if close=. then close=0;
if close2=. then close2=0;
if close3=. then close3=0;
run;

data tri4; 
set tri4;
drop gvkey1;
run;

data tri4; 
set tri4;
post=0;
if event_year^=. then post=1;
if treatment=. then treatment=0;
if close=. then close=0;
if closemf=. then closemf=0;
if closemf1=. then closemf1=0;
if closemf2=. then closemf2=0;
if closemf3=. then closemf3=0;
run;

data tri5;
set tri4;
treat_close_post13=treatment*close*post13;
treat_close_post=treatment*close*post;
treat_close_post_closemf=treatment*close*post*closemf;
treat_close_post_closemf1=treatment*close*post*closemf1;
treat_close_post_closemf2=treatment*close*post*closemf2;
treat_close_post_closemf3=treatment*close*post*closemf3;
run;


proc sort data=tri5; by gvkey DunsNumber year descending treat_close_post; run;
proc sort data=tri5 nodupkey; by gvkey DunsNumber year ; run;
proc sql;
create table tri6 as select a.gvkey, a.year, a.close, a.treatment, a.treat_close_post, a.post, a.DunsNumber, a.onsite_release, a.offsite_release, a.total_releases, a.treat_close_post13,
a.event_year, a.post13, a.RSEI_Score, a.RSEI_Score_Cancer, a.RSEI_Score_Noncancer, a.prod_ratio, a.prod_ratio_harm, 
a.treat_close_post_closemf, a.treat_close_post_closemf1, a.treat_close_post_closemf2, a.treat_close_post_closemf3,
b.gvkey1, b.lpermno, b.sic, b.sale_growth, b.size, b.bm, b.roa, b.mlev, b.blev, b.LEV, b.TobinQ, b.atn, b.lnsale, b.COGS, b.Inventory, b.invt, b.inflex1, b.avg_inflex1, b.qfc
from tri5 as a left join ccmfunda4 as b
on a.gvkey=b.gvkey and a.year=b.fyear; quit;

proc sort data=enforce_plants4 out=enforce_firm nodupkey; by gvkey DunsNumber year; run;

proc sql;
create table tri7 as select a.*, b.year as enforce_year
from tri6 as a left join enforce_firm as b
on a.gvkey=b.gvkey and a.DunsNumber=b.DunsNumber and a.year>b.year and a.year<=b.year+3; quit;


data tri7;
set tri7;
self_enforce=0;
if enforce_year^=. then self_enforce=1; run;

proc sort data=tri7; by gvkey DunsNumber year descending self_enforce; run;
proc sort data=tri7 nodupkey; by gvkey DunsNumber year; run;

data tri91;
set tri7;
sic3=int(sic/10);
sic2=int(sic/100);
if  SIC<6000 or SIC>6999;
if year<=2015;
run;

proc sql;
create table tri91a as select a.*, sum(onsite_release) as sum_onsite, sum(treat_close_post) as sum_treat_close_post, sum(self_enforce) as sum_self_enforce,
sum(treat_close_post_closemf) as sum_treat_close_post_closemf, sum(treat_close_post_closemf1) as sum_treat_close_post_closemf1,
sum(treat_close_post_closemf3) as sum_treat_close_post_closemf3, sum(treat_close_post13) as sum_treat_close_post13, 
sum(RSEI_Score) as sum_rsei, avg(RSEI_Score) as avg_rsei
from tri91 as a
group by gvkey, year; quit;

data tri91a;
set tri91a;
drop onsite_release treat_close_post self_enforce treat_close_post_closemf treat_close_post_closemf1 treat_close_post_closemf3; run;
data tri91a;
set tri91a;
rename sum_onsite=onsite_release;
rename sum_self_enforce=self_enforce;
treat_close_post=0;
if sum_treat_close_post>0 then treat_close_post=1;
treat_close_post_closemf=0;
if sum_treat_close_post_closemf>0 then treat_close_post_closemf=1;
treat_close_post_closemf1=0;
if sum_treat_close_post_closemf1>0 then treat_close_post_closemf1=1;
treat_close_post_closemf3=0;
if sum_treat_close_post_closemf3>0 then treat_close_post_closemf3=1;
treat_close_post13=0;
if sum_treat_close_post13>0 then treat_close_post13=1;
run;

data tri91b;
set tri91a;
if self_enforce>0 and self_enforce^=.  then delete;
run;
proc sort data=tri91b nodupkey; by gvkey year; run;

data event;
set tri91b;
if event_year^=. and treat_close_post=1; run;

proc sort data=event nodupkey; by gvkey  event_year ; run;
data event;
set event;
by gvkey  event_year ;
firstevent=0;
if first.gvkey then firstevent=1;
run;

proc sql;
create table tri91c as select a.*, b.event_year as event_year610, b.firstevent
from tri91b as a left join event as b
on a.gvkey=b.gvkey; quit;

data tri91d;
set tri91c;
pre_treat=0;
if treatfirm_post=0 and event_year610^=. and year-event_year610>3 then pre_treat=1;
run;
proc sort data=tri91d; by gvkey  year treat_close_post descending pre_treat; run;
proc sort data=tri91d nodupkey; by gvkey  year treat_close_post; run;

data peertri;
set peertri;
onsite1=onsite_release;
offsite1=offsite_release;
totaltri1=total_releases;
run;

proc sql;
	create table p1a as
	select *, mean(onsite1) as avgonsite1, std(onsite1) as sdonsite1,
	mean(offsite1) as avgoffsite1, std(offsite1) as sdoffsite1,
	mean(totaltri1) as avgtotaltri1, std(totaltri1) as sdtotaltri1
	from peertri
	group by sic2, year;
quit;

data p1b;
	set p1a;
	sdonsite = (onsite1-avgonsite1)/sdonsite1;
	sdoffsite = (offsite1-avgoffsite1)/sdoffsite1;
	sdtotaltri = (totaltri1-avgtotaltri1)/sdtotaltri1;
run;

data p1d;
set p1b;
if self_enforce>0 then delete;
if post13=1 then delete;
 run;

 data treat;
 set p1d;
 if treat_close_post=1;
 run;
proc sort data=treat out=cohort nodupkey; by event_year; run;
data cohort;
set cohort;
if event_year>=2003 and event_year<=2012;
keep event_year;
run;

 data cohort1;
 set cohort;
 by event_year; 
Cohort=_N_;
 run;

proc sort data=treat out=treat1 nodupkey; by gvkey  event_year; run;

proc sql;
create table treat2 as select a.*, b.Cohort
from treat1 as a, cohort1 as b
where a.event_year=b.event_year; quit;

proc sql;
create table treat3 as select a.*, b.Cohort, b.event_year as event_year1
from p1d as a, treat2 as b
where a.gvkey=b.gvkey and a.year>=b.event_year-3 and a.year<=b.event_year+10; quit;

proc sort data=treat3 nodupkey; by gvkey  year cohort; run;

data treat3;
set treat3;
if year<event_year1 then post1=0;
if year>event_year1+3 then post1=1;
if year>=event_year1 and year<=event_year1+3 then delete;
run;

data control;
set p1d;
if treat_close_post=0;
if pre_treat=0;
run;

proc sql;
create table control1 as select a.*, b.event_year as event_year1
from control as a left join treat2 as b
on a.gvkey=b.gvkey  and a.year>=b.event_year-6 and a.year<=b.event_year+10; quit; 

data control2;
set control1;
if event_year1^=. then delete;
run;

data control2;
set control2;
drop event_year1;
run;

proc sql;
create table control3 as select a.*, b.cohort, b.event_year as event_year1
from control2 as a, cohort1 as b
where a.year>=b.event_year-3 and a.year<=b.event_year+10; quit;

proc sort data=control3 nodupkey; by gvkey  year cohort; run;
data control3;
set control3;
if year<event_year1 then post1=0;
if year>event_year1+3 then post1=1;
if year>=event_year1 and year<=event_year1+3 then delete;
treat1=0;
run;
data treat3;
set treat3;
treat1=1;
run;

data p1e;
set treat3 control3;
if post1^=.;
run;
data p1e;
set p1e;
treat_post1=treat1*post1;
lnrsei_score=log(avg_rsei+1); run;
 PROC EXPORT DATA=p1e
            FILE="X:\Project_EPA\data\peertrimf_firm.dta"
            DBMS=STATA REPLACE;
RUN;
